library(readr)
library(ggplot2)
library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggdark)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(ggpubr)
library(xgboost)
##
## Attaching package: 'xgboost'
## The following object is masked from 'package:plotly':
##
## slice
## The following object is masked from 'package:dplyr':
##
## slice
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:plotly':
##
## select
## The following object is masked from 'package:dplyr':
##
## select
library(caret)
## Loading required package: lattice
library(corrplot)
## corrplot 0.92 loaded
library(ggExtra) # Load ggExtra
library(xgboost) # Load XGBoost
source("a_insights_shap_functions.r") # Load SHAP functions
library(Metrics) # Load metrics
##
## Attaching package: 'Metrics'
## The following objects are masked from 'package:caret':
##
## precision, recall
library(pROC) # Load proc
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following object is masked from 'package:Metrics':
##
## auc
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
day <- read_csv("day_approach_maskedID_timeseries.csv")
## Rows: 42766 Columns: 73
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (73): nr. sessions, total km, km Z3-4, km Z5-T1-T2, km sprinting, streng...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
week <- read_csv("week_approach_maskedID_timeseries.csv")
## Rows: 42798 Columns: 72
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (72): nr. sessions, nr. rest days, total kms, max km one day, total km Z...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Remove rows with missing or invalid values in injury column
week <- week[complete.cases(week$injury), ]
summary(week)
## nr. sessions nr. rest days total kms max km one day
## Min. : 0.000 Min. :0.000 Min. : 0.00 Min. : 0.00
## 1st Qu.: 5.000 1st Qu.:1.000 1st Qu.: 22.80 1st Qu.: 9.00
## Median : 6.000 Median :1.000 Median : 44.80 Median : 13.40
## Mean : 5.809 Mean :1.875 Mean : 49.54 Mean : 14.01
## 3rd Qu.: 7.000 3rd Qu.:3.000 3rd Qu.: 70.10 3rd Qu.: 18.30
## Max. :14.000 Max. :7.000 Max. :242.00 Max. :131.00
## total km Z3-Z4-Z5-T1-T2 nr. tough sessions (effort in Z5, T1 or T2)
## Min. : 0.000 Min. :0.0000
## 1st Qu.: 1.000 1st Qu.:0.0000
## Median : 8.000 Median :1.0000
## Mean : 9.434 Mean :0.9302
## 3rd Qu.: 14.600 3rd Qu.:2.0000
## Max. :100.000 Max. :6.0000
## nr. days with interval session total km Z3-4 max km Z3-4 one day
## Min. :0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.:0.000 1st Qu.: 0.000 1st Qu.: 0.000
## Median :2.000 Median : 0.000 Median : 0.000
## Mean :1.673 Mean : 4.859 Mean : 3.457
## 3rd Qu.:3.000 3rd Qu.: 8.000 3rd Qu.: 6.300
## Max. :7.000 Max. :79.800 Max. :75.000
## total km Z5-T1-T2 max km Z5-T1-T2 one day total hours alternative training
## Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000
## Median : 1.500 Median : 1.200 Median : 0.000
## Mean : 4.064 Mean : 2.725 Mean : 1.149
## 3rd Qu.: 6.300 3rd Qu.: 5.000 3rd Qu.: 1.500
## Max. :80.000 Max. :76.000 Max. :52.500
## nr. strength trainings avg exertion min exertion max exertion
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.1400 1st Qu.:0.0900 1st Qu.:0.1600
## Median :1.0000 Median :0.3200 Median :0.1400 Median :0.5100
## Mean :0.8156 Mean :0.3199 Mean :0.1887 Mean :0.4706
## 3rd Qu.:1.0000 3rd Qu.:0.4800 3rd Qu.:0.2700 3rd Qu.:0.7300
## Max. :9.0000 Max. :0.9800 Max. :0.9800 Max. :1.0000
## avg training success min training success max training success
## Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.5900 Median :0.3700 Median :0.7300
## Mean :0.4475 Mean :0.3384 Mean :0.5251
## 3rd Qu.:0.7300 3rd Qu.:0.6100 3rd Qu.:0.8400
## Max. :1.0000 Max. :1.0000 Max. :1.0000
## avg recovery min recovery max recovery nr. sessions.1
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. : 0.000
## 1st Qu.:0.1500 1st Qu.:0.1100 1st Qu.:0.1700 1st Qu.: 5.000
## Median :0.2200 Median :0.1600 Median :0.3100 Median : 6.000
## Mean :0.2555 Mean :0.1856 Mean :0.3442 Mean : 5.808
## 3rd Qu.:0.3600 3rd Qu.:0.2500 3rd Qu.:0.5200 3rd Qu.: 7.000
## Max. :0.9000 Max. :0.9000 Max. :1.0000 Max. :14.000
## nr. rest days.1 total kms.1 max km one day.1 total km Z3-Z4-Z5-T1-T2.1
## Min. :0.000 Min. : 0.00 Min. : 0.00 Min. : 0.000
## 1st Qu.:1.000 1st Qu.: 22.20 1st Qu.: 8.80 1st Qu.: 0.800
## Median :1.000 Median : 44.40 Median : 13.30 Median : 8.000
## Mean :1.879 Mean : 49.26 Mean : 13.92 Mean : 9.384
## 3rd Qu.:3.000 3rd Qu.: 70.00 3rd Qu.: 18.30 3rd Qu.: 14.500
## Max. :7.000 Max. :235.00 Max. :130.00 Max. :106.200
## nr. tough sessions (effort in Z5, T1 or T2).1 nr. days with interval session.1
## Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.000
## Median :1.0000 Median :2.000
## Mean :0.9247 Mean :1.664
## 3rd Qu.:2.0000 3rd Qu.:3.000
## Max. :6.0000 Max. :7.000
## total km Z3-4.1 max km Z3-4 one day.1 total km Z5-T1-T2.1
## Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000
## Median : 0.000 Median : 0.000 Median : 1.400
## Mean : 4.841 Mean : 3.438 Mean : 4.022
## 3rd Qu.: 8.000 3rd Qu.: 6.300 3rd Qu.: 6.200
## Max. :85.000 Max. :75.000 Max. :80.000
## max km Z5-T1-T2 one day.1 total hours alternative training.1
## Min. : 0.000 Min. : 0.000
## 1st Qu.: 0.000 1st Qu.: 0.000
## Median : 1.000 Median : 0.000
## Mean : 2.693 Mean : 1.172
## 3rd Qu.: 5.000 3rd Qu.: 1.500
## Max. :76.000 Max. :52.500
## nr. strength trainings.1 avg exertion.1 min exertion.1 max exertion.1
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.1400 1st Qu.:0.0900 1st Qu.:0.1600
## Median :1.0000 Median :0.3200 Median :0.1400 Median :0.5100
## Mean :0.8182 Mean :0.3196 Mean :0.1881 Mean :0.4701
## 3rd Qu.:1.0000 3rd Qu.:0.4800 3rd Qu.:0.2700 3rd Qu.:0.7300
## Max. :9.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## avg training success.1 min training success.1 max training success.1
## Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.5900 Median :0.3800 Median :0.7300
## Mean :0.4481 Mean :0.3398 Mean :0.5254
## 3rd Qu.:0.7300 3rd Qu.:0.6100 3rd Qu.:0.8400
## Max. :1.0000 Max. :1.0000 Max. :1.0000
## avg recovery.1 min recovery.1 max recovery.1 nr. sessions.2
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. : 0.000
## 1st Qu.:0.1500 1st Qu.:0.1100 1st Qu.:0.1700 1st Qu.: 5.000
## Median :0.2200 Median :0.1600 Median :0.3100 Median : 6.000
## Mean :0.2551 Mean :0.1851 Mean :0.3436 Mean : 5.811
## 3rd Qu.:0.3600 3rd Qu.:0.2500 3rd Qu.:0.5200 3rd Qu.: 7.000
## Max. :0.9000 Max. :0.9000 Max. :1.0000 Max. :14.000
## nr. rest days.2 total kms.2 max km one day.2 total km Z3-Z4-Z5-T1-T2.2
## Min. :0.000 Min. : 0.00 Min. : 0.00 Min. : 0.000
## 1st Qu.:1.000 1st Qu.: 21.70 1st Qu.: 8.60 1st Qu.: 0.200
## Median :1.000 Median : 43.90 Median : 13.20 Median : 7.900
## Mean :1.884 Mean : 48.81 Mean : 13.82 Mean : 9.297
## 3rd Qu.:3.000 3rd Qu.: 69.60 3rd Qu.: 18.20 3rd Qu.: 14.500
## Max. :7.000 Max. :235.00 Max. :135.00 Max. :150.000
## nr. tough sessions (effort in Z5, T1 or T2).2 nr. days with interval session.2
## Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.000
## Median :1.0000 Median :2.000
## Mean :0.9153 Mean :1.653
## 3rd Qu.:2.0000 3rd Qu.:3.000
## Max. :6.0000 Max. :7.000
## total km Z3-4.2 max km Z3-4 one day.2 total km Z5-T1-T2.2
## Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000
## Median : 0.000 Median : 0.000 Median : 1.000
## Mean : 4.793 Mean : 3.399 Mean : 3.977
## 3rd Qu.: 8.000 3rd Qu.: 6.200 3rd Qu.: 6.100
## Max. :85.000 Max. :75.000 Max. :52.200
## max km Z5-T1-T2 one day.2 total hours alternative training.2
## Min. : 0.000 Min. : 0.000
## 1st Qu.: 0.000 1st Qu.: 0.000
## Median : 1.000 Median : 0.000
## Mean : 2.665 Mean : 1.204
## 3rd Qu.: 4.900 3rd Qu.: 1.580
## Max. :30.000 Max. :67.330
## nr. strength trainings.2 avg exertion.2 min exertion.2 max exertion.2
## Min. :0.0000 Min. :0.00 Min. :0.0000 Min. :0.00
## 1st Qu.:0.0000 1st Qu.:0.14 1st Qu.:0.0900 1st Qu.:0.16
## Median :1.0000 Median :0.32 Median :0.1400 Median :0.51
## Mean :0.8244 Mean :0.32 Mean :0.1878 Mean :0.47
## 3rd Qu.:1.0000 3rd Qu.:0.49 3rd Qu.:0.2700 3rd Qu.:0.73
## Max. :9.0000 Max. :0.98 Max. :0.9800 Max. :1.00
## avg training success.2 min training success.2 max training success.2
## Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.5900 Median :0.3800 Median :0.7300
## Mean :0.4481 Mean :0.3396 Mean :0.5257
## 3rd Qu.:0.7300 3rd Qu.:0.6100 3rd Qu.:0.8400
## Max. :1.0000 Max. :1.0000 Max. :1.0000
## avg recovery.2 min recovery.2 max recovery.2 Athlete ID
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. : 0.00
## 1st Qu.:0.1500 1st Qu.:0.1100 1st Qu.:0.1700 1st Qu.:20.00
## Median :0.2200 Median :0.1600 Median :0.3100 Median :34.00
## Mean :0.2551 Mean :0.1849 Mean :0.3435 Mean :34.54
## 3rd Qu.:0.3600 3rd Qu.:0.2400 3rd Qu.:0.5200 3rd Qu.:50.00
## Max. :0.9000 Max. :0.9000 Max. :1.0000 Max. :73.00
## injury rel total kms week 0_1 rel total kms week 0_2
## Min. :0.00000 Min. : 0 Min. : 0
## 1st Qu.:0.00000 1st Qu.: 1 1st Qu.: 1
## Median :0.00000 Median : 1 Median : 1
## Mean :0.01344 Mean : 440863 Mean : 901468
## 3rd Qu.:0.00000 3rd Qu.: 1 3rd Qu.: 1
## Max. :1.00000 Max. :209600000 Max. :217600000
## rel total kms week 1_2 Date
## Min. : 0 Min. : 0
## 1st Qu.: 1 1st Qu.: 437
## Median : 1 Median :1254
## Mean : 480362 Mean :1228
## 3rd Qu.: 1 3rd Qu.:1913
## Max. :209600000 Max. :2673
noninjured <-week[week$injury == 0,]
summary(noninjured)
## nr. sessions nr. rest days total kms max km one day
## Min. : 0.000 Min. :0.000 Min. : 0.00 Min. : 0.0
## 1st Qu.: 5.000 1st Qu.:1.000 1st Qu.: 22.60 1st Qu.: 9.0
## Median : 6.000 Median :1.000 Median : 44.70 Median : 13.4
## Mean : 5.801 Mean :1.882 Mean : 49.51 Mean : 14.0
## 3rd Qu.: 7.000 3rd Qu.:3.000 3rd Qu.: 70.10 3rd Qu.: 18.3
## Max. :14.000 Max. :7.000 Max. :242.00 Max. :131.0
## total km Z3-Z4-Z5-T1-T2 nr. tough sessions (effort in Z5, T1 or T2)
## Min. : 0.000 Min. :0.0000
## 1st Qu.: 1.000 1st Qu.:0.0000
## Median : 8.000 Median :1.0000
## Mean : 9.405 Mean :0.9272
## 3rd Qu.: 14.500 3rd Qu.:2.0000
## Max. :100.000 Max. :6.0000
## nr. days with interval session total km Z3-4 max km Z3-4 one day
## Min. :0.000 Min. : 0.000 Min. : 0.00
## 1st Qu.:0.000 1st Qu.: 0.000 1st Qu.: 0.00
## Median :2.000 Median : 0.000 Median : 0.00
## Mean :1.668 Mean : 4.849 Mean : 3.45
## 3rd Qu.:3.000 3rd Qu.: 8.000 3rd Qu.: 6.30
## Max. :7.000 Max. :79.800 Max. :75.00
## total km Z5-T1-T2 max km Z5-T1-T2 one day total hours alternative training
## Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000
## Median : 1.500 Median : 1.000 Median : 0.000
## Mean : 4.049 Mean : 2.717 Mean : 1.149
## 3rd Qu.: 6.300 3rd Qu.: 5.000 3rd Qu.: 1.500
## Max. :80.000 Max. :76.000 Max. :52.500
## nr. strength trainings avg exertion min exertion max exertion
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.1400 1st Qu.:0.0900 1st Qu.:0.1600
## Median :1.0000 Median :0.3200 Median :0.1400 Median :0.5100
## Mean :0.8123 Mean :0.3188 Mean :0.1882 Mean :0.4688
## 3rd Qu.:1.0000 3rd Qu.:0.4800 3rd Qu.:0.2700 3rd Qu.:0.7300
## Max. :9.0000 Max. :0.9800 Max. :0.9800 Max. :1.0000
## avg training success min training success max training success
## Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.5900 Median :0.3700 Median :0.7300
## Mean :0.4461 Mean :0.3376 Mean :0.5233
## 3rd Qu.:0.7300 3rd Qu.:0.6100 3rd Qu.:0.8400
## Max. :1.0000 Max. :1.0000 Max. :1.0000
## avg recovery min recovery max recovery nr. sessions.1
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. : 0.000
## 1st Qu.:0.1500 1st Qu.:0.1100 1st Qu.:0.1700 1st Qu.: 5.000
## Median :0.2200 Median :0.1600 Median :0.3100 Median : 6.000
## Mean :0.2549 Mean :0.1854 Mean :0.3431 Mean : 5.802
## 3rd Qu.:0.3600 3rd Qu.:0.2500 3rd Qu.:0.5200 3rd Qu.: 7.000
## Max. :0.9000 Max. :0.9000 Max. :1.0000 Max. :14.000
## nr. rest days.1 total kms.1 max km one day.1 total km Z3-Z4-Z5-T1-T2.1
## Min. :0.000 Min. : 0.00 Min. : 0.00 Min. : 0.000
## 1st Qu.:1.000 1st Qu.: 22.10 1st Qu.: 8.80 1st Qu.: 0.800
## Median :1.000 Median : 44.40 Median :13.30 Median : 8.000
## Mean :1.884 Mean : 49.23 Mean :13.91 Mean : 9.367
## 3rd Qu.:3.000 3rd Qu.: 70.00 3rd Qu.:18.30 3rd Qu.: 14.500
## Max. :7.000 Max. :235.00 Max. :90.00 Max. :106.200
## nr. tough sessions (effort in Z5, T1 or T2).1 nr. days with interval session.1
## Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.000
## Median :1.0000 Median :2.000
## Mean :0.9225 Mean :1.661
## 3rd Qu.:2.0000 3rd Qu.:3.000
## Max. :6.0000 Max. :7.000
## total km Z3-4.1 max km Z3-4 one day.1 total km Z5-T1-T2.1
## Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000
## Median : 0.000 Median : 0.000 Median : 1.200
## Mean : 4.836 Mean : 3.434 Mean : 4.012
## 3rd Qu.: 8.000 3rd Qu.: 6.300 3rd Qu.: 6.200
## Max. :85.000 Max. :75.000 Max. :80.000
## max km Z5-T1-T2 one day.1 total hours alternative training.1
## Min. : 0.000 Min. : 0.000
## 1st Qu.: 0.000 1st Qu.: 0.000
## Median : 1.000 Median : 0.000
## Mean : 2.688 Mean : 1.173
## 3rd Qu.: 5.000 3rd Qu.: 1.500
## Max. :76.000 Max. :52.500
## nr. strength trainings.1 avg exertion.1 min exertion.1 max exertion.1
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.1400 1st Qu.:0.0900 1st Qu.:0.1600
## Median :1.0000 Median :0.3200 Median :0.1400 Median :0.5100
## Mean :0.8148 Mean :0.3186 Mean :0.1876 Mean :0.4685
## 3rd Qu.:1.0000 3rd Qu.:0.4800 3rd Qu.:0.2700 3rd Qu.:0.7300
## Max. :9.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## avg training success.1 min training success.1 max training success.1
## Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.5900 Median :0.3800 Median :0.7300
## Mean :0.4468 Mean :0.3389 Mean :0.5237
## 3rd Qu.:0.7300 3rd Qu.:0.6100 3rd Qu.:0.8400
## Max. :1.0000 Max. :1.0000 Max. :1.0000
## avg recovery.1 min recovery.1 max recovery.1 nr. sessions.2
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. : 0.000
## 1st Qu.:0.1500 1st Qu.:0.1100 1st Qu.:0.1700 1st Qu.: 5.000
## Median :0.2200 Median :0.1600 Median :0.3100 Median : 6.000
## Mean :0.2546 Mean :0.1849 Mean :0.3427 Mean : 5.807
## 3rd Qu.:0.3600 3rd Qu.:0.2400 3rd Qu.:0.5100 3rd Qu.: 7.000
## Max. :0.9000 Max. :0.9000 Max. :1.0000 Max. :14.000
## nr. rest days.2 total kms.2 max km one day.2 total km Z3-Z4-Z5-T1-T2.2
## Min. :0.000 Min. : 0.00 Min. : 0.00 Min. : 0.000
## 1st Qu.:1.000 1st Qu.: 21.60 1st Qu.: 8.60 1st Qu.: 0.000
## Median :1.000 Median : 43.90 Median : 13.20 Median : 7.900
## Mean :1.887 Mean : 48.82 Mean : 13.81 Mean : 9.291
## 3rd Qu.:3.000 3rd Qu.: 69.70 3rd Qu.: 18.20 3rd Qu.: 14.500
## Max. :7.000 Max. :235.00 Max. :135.00 Max. :150.000
## nr. tough sessions (effort in Z5, T1 or T2).2 nr. days with interval session.2
## Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.000
## Median :1.0000 Median :2.000
## Mean :0.9143 Mean :1.652
## 3rd Qu.:2.0000 3rd Qu.:3.000
## Max. :6.0000 Max. :7.000
## total km Z3-4.2 max km Z3-4 one day.2 total km Z5-T1-T2.2
## Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000
## Median : 0.000 Median : 0.000 Median : 1.000
## Mean : 4.793 Mean : 3.398 Mean : 3.973
## 3rd Qu.: 8.000 3rd Qu.: 6.200 3rd Qu.: 6.100
## Max. :85.000 Max. :75.000 Max. :52.200
## max km Z5-T1-T2 one day.2 total hours alternative training.2
## Min. : 0.000 Min. : 0.000
## 1st Qu.: 0.000 1st Qu.: 0.000
## Median : 1.000 Median : 0.000
## Mean : 2.662 Mean : 1.204
## 3rd Qu.: 4.900 3rd Qu.: 1.580
## Max. :30.000 Max. :67.330
## nr. strength trainings.2 avg exertion.2 min exertion.2 max exertion.2
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.1400 1st Qu.:0.0900 1st Qu.:0.1600
## Median :1.0000 Median :0.3200 Median :0.1400 Median :0.5100
## Mean :0.8223 Mean :0.3191 Mean :0.1873 Mean :0.4686
## 3rd Qu.:1.0000 3rd Qu.:0.4800 3rd Qu.:0.2700 3rd Qu.:0.7300
## Max. :9.0000 Max. :0.9800 Max. :0.9800 Max. :1.0000
## avg training success.2 min training success.2 max training success.2
## Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.5900 Median :0.3700 Median :0.7300
## Mean :0.4469 Mean :0.3388 Mean :0.5241
## 3rd Qu.:0.7300 3rd Qu.:0.6100 3rd Qu.:0.8400
## Max. :1.0000 Max. :1.0000 Max. :1.0000
## avg recovery.2 min recovery.2 max recovery.2 Athlete ID injury
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. : 0.00 Min. :0
## 1st Qu.:0.1500 1st Qu.:0.1100 1st Qu.:0.1700 1st Qu.:20.00 1st Qu.:0
## Median :0.2200 Median :0.1600 Median :0.3100 Median :34.00 Median :0
## Mean :0.2546 Mean :0.1847 Mean :0.3426 Mean :34.53 Mean :0
## 3rd Qu.:0.3600 3rd Qu.:0.2400 3rd Qu.:0.5100 3rd Qu.:50.00 3rd Qu.:0
## Max. :0.9000 Max. :0.9000 Max. :1.0000 Max. :73.00 Max. :0
## rel total kms week 0_1 rel total kms week 0_2 rel total kms week 1_2
## Min. : 0 Min. : 0 Min. : 0
## 1st Qu.: 1 1st Qu.: 1 1st Qu.: 1
## Median : 1 Median : 1 Median : 1
## Mean : 442719 Mean : 899340 Mean : 480452
## 3rd Qu.: 1 3rd Qu.: 1 3rd Qu.: 1
## Max. :209600000 Max. :217600000 Max. :209600000
## Date
## Min. : 0
## 1st Qu.: 429
## Median :1251
## Mean :1225
## 3rd Qu.:1909
## Max. :2652
injured <- week[week$injury == 1, ]
summary(injured)
## nr. sessions nr. rest days total kms max km one day
## Min. : 0.000 Min. :0.000 Min. : 0.00 Min. : 0.00
## 1st Qu.: 5.000 1st Qu.:0.000 1st Qu.: 30.70 1st Qu.:11.00
## Median : 6.000 Median :1.000 Median : 49.00 Median :14.30
## Mean : 6.435 Mean :1.353 Mean : 51.86 Mean :14.77
## 3rd Qu.: 8.000 3rd Qu.:2.000 3rd Qu.: 69.00 3rd Qu.:18.00
## Max. :13.000 Max. :7.000 Max. :174.00 Max. :57.00
## total km Z3-Z4-Z5-T1-T2 nr. tough sessions (effort in Z5, T1 or T2)
## Min. : 0.00 Min. :0.000
## 1st Qu.: 5.90 1st Qu.:0.000
## Median :10.90 Median :1.000
## Mean :11.55 Mean :1.146
## 3rd Qu.:16.50 3rd Qu.:2.000
## Max. :45.50 Max. :4.000
## nr. days with interval session total km Z3-4 max km Z3-4 one day
## Min. :0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.:1.000 1st Qu.: 0.000 1st Qu.: 0.000
## Median :2.000 Median : 3.800 Median : 3.500
## Mean :1.991 Mean : 5.593 Mean : 3.939
## 3rd Qu.:3.000 3rd Qu.: 9.000 3rd Qu.: 7.000
## Max. :6.000 Max. :35.400 Max. :21.100
## total km Z5-T1-T2 max km Z5-T1-T2 one day total hours alternative training
## Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000
## Median : 4.000 Median : 3.600 Median : 0.000
## Mean : 5.179 Mean : 3.358 Mean : 1.142
## 3rd Qu.: 8.150 3rd Qu.: 6.000 3rd Qu.: 1.500
## Max. :32.300 Max. :15.000 Max. :27.420
## nr. strength trainings avg exertion min exertion max exertion
## Min. :0.000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.000 1st Qu.:0.2400 1st Qu.:0.1000 1st Qu.:0.4100
## Median :1.000 Median :0.4300 Median :0.1800 Median :0.6900
## Mean :1.061 Mean :0.4043 Mean :0.2213 Mean :0.6029
## 3rd Qu.:2.000 3rd Qu.:0.5600 3rd Qu.:0.3100 3rd Qu.:0.8050
## Max. :5.000 Max. :0.8500 Max. :0.7000 Max. :1.0000
## avg training success min training success max training success
## Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.5100 1st Qu.:0.1450 1st Qu.:0.6500
## Median :0.6600 Median :0.4600 Median :0.7800
## Mean :0.5507 Mean :0.3977 Mean :0.6539
## 3rd Qu.:0.7400 3rd Qu.:0.6200 3rd Qu.:0.8700
## Max. :0.9800 Max. :0.9600 Max. :1.0000
## avg recovery min recovery max recovery nr. sessions.1
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. : 0.000
## 1st Qu.:0.1800 1st Qu.:0.1200 1st Qu.:0.2150 1st Qu.: 5.000
## Median :0.2800 Median :0.1800 Median :0.4100 Median : 6.000
## Mean :0.2997 Mean :0.2031 Mean :0.4254 Mean : 6.294
## 3rd Qu.:0.4000 3rd Qu.:0.2700 3rd Qu.:0.5900 3rd Qu.: 8.000
## Max. :0.8600 Max. :0.7000 Max. :1.0000 Max. :14.000
## nr. rest days.1 total kms.1 max km one day.1 total km Z3-Z4-Z5-T1-T2.1
## Min. :0.000 Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.:1.000 1st Qu.: 30.55 1st Qu.: 10.50 1st Qu.: 4.80
## Median :1.000 Median : 47.30 Median : 14.00 Median :10.00
## Mean :1.482 Mean : 51.19 Mean : 14.73 Mean :10.65
## 3rd Qu.:2.000 3rd Qu.: 66.85 3rd Qu.: 18.00 3rd Qu.:15.50
## Max. :7.000 Max. :202.00 Max. :130.00 Max. :54.60
## nr. tough sessions (effort in Z5, T1 or T2).1 nr. days with interval session.1
## Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:1.000
## Median :1.000 Median :2.000
## Mean :1.087 Mean :1.875
## 3rd Qu.:2.000 3rd Qu.:3.000
## Max. :4.000 Max. :5.000
## total km Z3-4.1 max km Z3-4 one day.1 total km Z5-T1-T2.1
## Min. : 0.00 Min. : 0.000 Min. : 0.000
## 1st Qu.: 0.00 1st Qu.: 0.000 1st Qu.: 0.000
## Median : 2.50 Median : 2.400 Median : 3.200
## Mean : 5.22 Mean : 3.705 Mean : 4.732
## 3rd Qu.: 8.50 3rd Qu.: 6.500 3rd Qu.: 7.850
## Max. :34.70 Max. :21.100 Max. :42.100
## max km Z5-T1-T2 one day.1 total hours alternative training.1
## Min. : 0.000 Min. : 0.000
## 1st Qu.: 0.000 1st Qu.: 0.000
## Median : 3.000 Median : 0.000
## Mean : 3.075 Mean : 1.057
## 3rd Qu.: 5.600 3rd Qu.: 1.500
## Max. :21.100 Max. :12.000
## nr. strength trainings.1 avg exertion.1 min exertion.1 max exertion.1
## Min. :0.000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.000 1st Qu.:0.2150 1st Qu.:0.1100 1st Qu.:0.3400
## Median :1.000 Median :0.4200 Median :0.1800 Median :0.6800
## Mean :1.071 Mean :0.3946 Mean :0.2211 Mean :0.5871
## 3rd Qu.:2.000 3rd Qu.:0.5400 3rd Qu.:0.3100 3rd Qu.:0.8000
## Max. :5.000 Max. :0.9100 Max. :0.8800 Max. :1.0000
## avg training success.1 min training success.1 max training success.1
## Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.4800 1st Qu.:0.1350 1st Qu.:0.6350
## Median :0.6500 Median :0.4600 Median :0.7800
## Mean :0.5454 Mean :0.4016 Mean :0.6457
## 3rd Qu.:0.7400 3rd Qu.:0.6200 3rd Qu.:0.8600
## Max. :1.0000 Max. :1.0000 Max. :1.0000
## avg recovery.1 min recovery.1 max recovery.1 nr. sessions.2
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. : 0.00
## 1st Qu.:0.1700 1st Qu.:0.1300 1st Qu.:0.2000 1st Qu.: 5.00
## Median :0.2800 Median :0.1800 Median :0.3800 Median : 6.00
## Mean :0.2953 Mean :0.2029 Mean :0.4123 Mean : 6.07
## 3rd Qu.:0.4000 3rd Qu.:0.2700 3rd Qu.:0.5900 3rd Qu.: 7.00
## Max. :0.7300 Max. :0.6200 Max. :1.0000 Max. :14.00
## nr. rest days.2 total kms.2 max km one day.2 total km Z3-Z4-Z5-T1-T2.2
## Min. :0.000 Min. : 0.00 Min. : 0.00 Min. : 0.000
## 1st Qu.:1.000 1st Qu.: 26.90 1st Qu.: 9.75 1st Qu.: 3.000
## Median :1.000 Median : 42.50 Median :13.40 Median : 9.000
## Mean :1.652 Mean : 47.69 Mean :13.91 Mean : 9.737
## 3rd Qu.:2.000 3rd Qu.: 66.80 3rd Qu.:17.75 3rd Qu.:14.750
## Max. :7.000 Max. :191.00 Max. :52.90 Max. :59.800
## nr. tough sessions (effort in Z5, T1 or T2).2 nr. days with interval session.2
## Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:1.000
## Median :1.0000 Median :2.000
## Mean :0.9913 Mean :1.729
## 3rd Qu.:2.0000 3rd Qu.:3.000
## Max. :4.0000 Max. :5.000
## total km Z3-4.2 max km Z3-4 one day.2 total km Z5-T1-T2.2
## Min. : 0.000 Min. : 0.00 Min. : 0.000
## 1st Qu.: 0.000 1st Qu.: 0.00 1st Qu.: 0.000
## Median : 0.000 Median : 0.00 Median : 3.000
## Mean : 4.818 Mean : 3.48 Mean : 4.325
## 3rd Qu.: 8.000 3rd Qu.: 6.50 3rd Qu.: 6.550
## Max. :39.000 Max. :30.00 Max. :30.000
## max km Z5-T1-T2 one day.2 total hours alternative training.2
## Min. : 0.000 Min. : 0.000
## 1st Qu.: 0.000 1st Qu.: 0.000
## Median : 2.500 Median : 0.000
## Mean : 2.881 Mean : 1.212
## 3rd Qu.: 5.000 3rd Qu.: 1.615
## Max. :20.000 Max. :21.780
## nr. strength trainings.2 avg exertion.2 min exertion.2 max exertion.2
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.1800 1st Qu.:0.1000 1st Qu.:0.2800
## Median :1.0000 Median :0.4200 Median :0.1900 Median :0.6800
## Mean :0.9791 Mean :0.3873 Mean :0.2203 Mean :0.5748
## 3rd Qu.:2.0000 3rd Qu.:0.5500 3rd Qu.:0.3200 3rd Qu.:0.7900
## Max. :6.0000 Max. :0.7800 Max. :0.7300 Max. :1.0000
## avg training success.2 min training success.2 max training success.2
## Min. :0.000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.480 1st Qu.:0.115 1st Qu.:0.6300
## Median :0.650 Median :0.470 Median :0.7900
## Mean :0.541 Mean :0.397 Mean :0.6412
## 3rd Qu.:0.740 3rd Qu.:0.620 3rd Qu.:0.8700
## Max. :1.000 Max. :1.000 Max. :1.0000
## avg recovery.2 min recovery.2 max recovery.2 Athlete ID injury
## Min. :0.0000 Min. :0.00 Min. :0.0000 Min. : 0.00 Min. :1
## 1st Qu.:0.1700 1st Qu.:0.12 1st Qu.:0.1950 1st Qu.:23.00 1st Qu.:1
## Median :0.2800 Median :0.17 Median :0.4000 Median :36.00 Median :1
## Mean :0.2916 Mean :0.20 Mean :0.4076 Mean :35.42 Mean :1
## 3rd Qu.:0.4000 3rd Qu.:0.27 3rd Qu.:0.5700 3rd Qu.:50.00 3rd Qu.:1
## Max. :0.7500 Max. :0.69 Max. :1.0000 Max. :71.00 Max. :1
## rel total kms week 0_1 rel total kms week 0_2 rel total kms week 1_2
## Min. : 0 Min. : 0 Min. : 0
## 1st Qu.: 1 1st Qu.: 1 1st Qu.: 1
## Median : 1 Median : 1 Median : 1
## Mean : 304523 Mean : 1057740 Mean : 473740
## 3rd Qu.: 1 3rd Qu.: 2 3rd Qu.: 1
## Max. :47000000 Max. :100200000 Max. :53100000
## Date
## Min. : 246.0
## 1st Qu.: 765.5
## Median :1485.0
## Mean :1431.6
## 3rd Qu.:2075.5
## Max. :2673.0
injured$injury = (as.factor(injured$injury))
levels(injured$injury) <- c("Injured")
noninjured$injury = (as.factor(noninjured$injury))
levels(noninjured$injury) <- c("NotInjured")
injury_dist <- week %>%
group_by(`Athlete ID`, injury) %>%
summarise(count = n()) %>%
spread(injury, count, fill = 0)
## `summarise()` has grouped output by 'Athlete ID'. You can override using the
## `.groups` argument.
injury_dist
## # A tibble: 74 × 3
## # Groups: Athlete ID [74]
## `Athlete ID` `0` `1`
## <dbl> <dbl> <dbl>
## 1 0 303 7
## 2 1 459 2
## 3 2 1287 4
## 4 3 341 3
## 5 4 674 7
## 6 5 201 1
## 7 6 530 4
## 8 7 299 5
## 9 8 261 7
## 10 9 1210 22
## # … with 64 more rows
colnames(injury_dist) <- c("Athlete ID", "Not Injured", "Injured")
#p <- ggplot(injury_dist, aes(x=`Athlete ID`, fill=Injured))+ # Set dataset and aesthetics
# geom_density(alpha = 0.3, fill='red3')
#p
injury_sum <- week %>%
subset(week$injury==1,)
injury_sum <- injury_sum %>%
group_by(`Athlete ID`, injury) %>%
mutate(Freq = n())
injury_sum
## # A tibble: 575 × 73
## # Groups: Athlete ID, injury [61]
## nr. session…¹ nr. r…² total…³ max k…⁴ total…⁵ nr. t…⁶ nr. d…⁷ total…⁸ max k…⁹
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 6 1 34.9 14.5 10.4 0 2 9.5 5
## 2 6 1 33.6 15.2 15.3 1 2 12.4 6.4
## 3 1 6 7 7 0 0 0 0 0
## 4 7 0 47.2 12.5 14.7 0 3 14 6
## 5 9 1 29.4 7.5 3.9 3 3 0 0
## 6 7 0 28.5 6.9 10.8 1 4 10 4.5
## 7 3 4 23.4 12 1.9 1 2 1.5 1.5
## 8 6 1 117 22 20 0 2 20 11
## 9 6 1 114 30 10 1 1 0 0
## 10 6 2 78 24 15 1 2 9 9
## # … with 565 more rows, 64 more variables: `total km Z5-T1-T2` <dbl>,
## # `max km Z5-T1-T2 one day` <dbl>, `total hours alternative training` <dbl>,
## # `nr. strength trainings` <dbl>, `avg exertion` <dbl>, `min exertion` <dbl>,
## # `max exertion` <dbl>, `avg training success` <dbl>,
## # `min training success` <dbl>, `max training success` <dbl>,
## # `avg recovery` <dbl>, `min recovery` <dbl>, `max recovery` <dbl>,
## # `nr. sessions.1` <dbl>, `nr. rest days.1` <dbl>, `total kms.1` <dbl>, …
ggplot(injury_sum, aes(x=`Athlete ID`, fill=Freq))+
geom_bar()
## Warning: The following aesthetics were dropped during statistical transformation: fill
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
# Create plot
p <- ggplot(injury_sum, aes(x = `max exertion`, fill = injury)) + # Set dataset and aesthetics
geom_density(alpha = 0.3, fill='red3') +
labs(x = "Max Exertion", y = "Injuries", # Set labels for plot
title = "Injuries by Max Exertion") +
theme(axis.line = element_line(colour = "black"), # Set axis line as black
panel.grid.major = element_blank(), # Remove grid
panel.grid.minor = element_blank(), # Remove grid
panel.border = element_blank(), # Remove grid
panel.background = element_blank()) + # Remove grid
dark_theme_bw() # Turn theme to dark mode
## Inverted geom defaults of fill and color/colour.
## To change them back, use invert_geom_defaults().
p1 <- ggplot(injury_sum, aes(x = `avg recovery`, fill = injury)) + # Set dataset and aesthetics
geom_density(alpha = 0.3, fill ="blue2") +
labs(x = "Average Recovery", y = "Injuries", # Set labels for plot
title = "Injuries by Average Recovery") +
theme(axis.line = element_line(colour = "black"), # Set axis line as black
panel.grid.major = element_blank(), # Remove grid
panel.grid.minor = element_blank(), # Remove grid
panel.border = element_blank(), # Remove grid
panel.background = element_blank()) + # Remove grid
dark_theme_bw() # Turn theme to dark mode
p <- ggarrange(p, p1, ncol = 2, widths = c(3,3), common.legend = TRUE, legend = "bottom")
print(p)
For Weeks that an Athlete was Injured this was their average exertion
# Create a ggplot boxplot
p <- ggplot(week, aes(x = injury, y = `avg exertion`, fill=injury)) +
geom_boxplot()
p
## Warning: Continuous x aesthetic
## ℹ did you forget `aes(group = ...)`?
## Warning: The following aesthetics were dropped during statistical transformation: fill
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
# Create an interactive plotly version of the ggplot
ggplotly(p) %>%
layout(
title = "Boxplot of Average Exertion by Injury Status",
xaxis = list(title = "Injury Status"),
yaxis = list(title = "Average Exertion")
)
## Warning: Continuous x aesthetic
## ℹ did you forget `aes(group = ...)`?
## The following aesthetics were dropped during statistical transformation: fill
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
p <- week %>%
ggplot(aes(x = Date)) +
geom_density(aes( fill = injury), alpha = 0.3) +
scale_fill_manual(values = c("red2", "green")) +
labs(title = "Bar Chart and Density Plot of Average Exertion by Injury Status",
x = "Date", y = "Density", fill = "Injury Status")
p
## Warning: The following aesthetics were dropped during statistical transformation: fill
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
summary(week)
## nr. sessions nr. rest days total kms max km one day
## Min. : 0.000 Min. :0.000 Min. : 0.00 Min. : 0.00
## 1st Qu.: 5.000 1st Qu.:1.000 1st Qu.: 22.80 1st Qu.: 9.00
## Median : 6.000 Median :1.000 Median : 44.80 Median : 13.40
## Mean : 5.809 Mean :1.875 Mean : 49.54 Mean : 14.01
## 3rd Qu.: 7.000 3rd Qu.:3.000 3rd Qu.: 70.10 3rd Qu.: 18.30
## Max. :14.000 Max. :7.000 Max. :242.00 Max. :131.00
## total km Z3-Z4-Z5-T1-T2 nr. tough sessions (effort in Z5, T1 or T2)
## Min. : 0.000 Min. :0.0000
## 1st Qu.: 1.000 1st Qu.:0.0000
## Median : 8.000 Median :1.0000
## Mean : 9.434 Mean :0.9302
## 3rd Qu.: 14.600 3rd Qu.:2.0000
## Max. :100.000 Max. :6.0000
## nr. days with interval session total km Z3-4 max km Z3-4 one day
## Min. :0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.:0.000 1st Qu.: 0.000 1st Qu.: 0.000
## Median :2.000 Median : 0.000 Median : 0.000
## Mean :1.673 Mean : 4.859 Mean : 3.457
## 3rd Qu.:3.000 3rd Qu.: 8.000 3rd Qu.: 6.300
## Max. :7.000 Max. :79.800 Max. :75.000
## total km Z5-T1-T2 max km Z5-T1-T2 one day total hours alternative training
## Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000
## Median : 1.500 Median : 1.200 Median : 0.000
## Mean : 4.064 Mean : 2.725 Mean : 1.149
## 3rd Qu.: 6.300 3rd Qu.: 5.000 3rd Qu.: 1.500
## Max. :80.000 Max. :76.000 Max. :52.500
## nr. strength trainings avg exertion min exertion max exertion
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.1400 1st Qu.:0.0900 1st Qu.:0.1600
## Median :1.0000 Median :0.3200 Median :0.1400 Median :0.5100
## Mean :0.8156 Mean :0.3199 Mean :0.1887 Mean :0.4706
## 3rd Qu.:1.0000 3rd Qu.:0.4800 3rd Qu.:0.2700 3rd Qu.:0.7300
## Max. :9.0000 Max. :0.9800 Max. :0.9800 Max. :1.0000
## avg training success min training success max training success
## Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.5900 Median :0.3700 Median :0.7300
## Mean :0.4475 Mean :0.3384 Mean :0.5251
## 3rd Qu.:0.7300 3rd Qu.:0.6100 3rd Qu.:0.8400
## Max. :1.0000 Max. :1.0000 Max. :1.0000
## avg recovery min recovery max recovery nr. sessions.1
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. : 0.000
## 1st Qu.:0.1500 1st Qu.:0.1100 1st Qu.:0.1700 1st Qu.: 5.000
## Median :0.2200 Median :0.1600 Median :0.3100 Median : 6.000
## Mean :0.2555 Mean :0.1856 Mean :0.3442 Mean : 5.808
## 3rd Qu.:0.3600 3rd Qu.:0.2500 3rd Qu.:0.5200 3rd Qu.: 7.000
## Max. :0.9000 Max. :0.9000 Max. :1.0000 Max. :14.000
## nr. rest days.1 total kms.1 max km one day.1 total km Z3-Z4-Z5-T1-T2.1
## Min. :0.000 Min. : 0.00 Min. : 0.00 Min. : 0.000
## 1st Qu.:1.000 1st Qu.: 22.20 1st Qu.: 8.80 1st Qu.: 0.800
## Median :1.000 Median : 44.40 Median : 13.30 Median : 8.000
## Mean :1.879 Mean : 49.26 Mean : 13.92 Mean : 9.384
## 3rd Qu.:3.000 3rd Qu.: 70.00 3rd Qu.: 18.30 3rd Qu.: 14.500
## Max. :7.000 Max. :235.00 Max. :130.00 Max. :106.200
## nr. tough sessions (effort in Z5, T1 or T2).1 nr. days with interval session.1
## Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.000
## Median :1.0000 Median :2.000
## Mean :0.9247 Mean :1.664
## 3rd Qu.:2.0000 3rd Qu.:3.000
## Max. :6.0000 Max. :7.000
## total km Z3-4.1 max km Z3-4 one day.1 total km Z5-T1-T2.1
## Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000
## Median : 0.000 Median : 0.000 Median : 1.400
## Mean : 4.841 Mean : 3.438 Mean : 4.022
## 3rd Qu.: 8.000 3rd Qu.: 6.300 3rd Qu.: 6.200
## Max. :85.000 Max. :75.000 Max. :80.000
## max km Z5-T1-T2 one day.1 total hours alternative training.1
## Min. : 0.000 Min. : 0.000
## 1st Qu.: 0.000 1st Qu.: 0.000
## Median : 1.000 Median : 0.000
## Mean : 2.693 Mean : 1.172
## 3rd Qu.: 5.000 3rd Qu.: 1.500
## Max. :76.000 Max. :52.500
## nr. strength trainings.1 avg exertion.1 min exertion.1 max exertion.1
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.1400 1st Qu.:0.0900 1st Qu.:0.1600
## Median :1.0000 Median :0.3200 Median :0.1400 Median :0.5100
## Mean :0.8182 Mean :0.3196 Mean :0.1881 Mean :0.4701
## 3rd Qu.:1.0000 3rd Qu.:0.4800 3rd Qu.:0.2700 3rd Qu.:0.7300
## Max. :9.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## avg training success.1 min training success.1 max training success.1
## Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.5900 Median :0.3800 Median :0.7300
## Mean :0.4481 Mean :0.3398 Mean :0.5254
## 3rd Qu.:0.7300 3rd Qu.:0.6100 3rd Qu.:0.8400
## Max. :1.0000 Max. :1.0000 Max. :1.0000
## avg recovery.1 min recovery.1 max recovery.1 nr. sessions.2
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. : 0.000
## 1st Qu.:0.1500 1st Qu.:0.1100 1st Qu.:0.1700 1st Qu.: 5.000
## Median :0.2200 Median :0.1600 Median :0.3100 Median : 6.000
## Mean :0.2551 Mean :0.1851 Mean :0.3436 Mean : 5.811
## 3rd Qu.:0.3600 3rd Qu.:0.2500 3rd Qu.:0.5200 3rd Qu.: 7.000
## Max. :0.9000 Max. :0.9000 Max. :1.0000 Max. :14.000
## nr. rest days.2 total kms.2 max km one day.2 total km Z3-Z4-Z5-T1-T2.2
## Min. :0.000 Min. : 0.00 Min. : 0.00 Min. : 0.000
## 1st Qu.:1.000 1st Qu.: 21.70 1st Qu.: 8.60 1st Qu.: 0.200
## Median :1.000 Median : 43.90 Median : 13.20 Median : 7.900
## Mean :1.884 Mean : 48.81 Mean : 13.82 Mean : 9.297
## 3rd Qu.:3.000 3rd Qu.: 69.60 3rd Qu.: 18.20 3rd Qu.: 14.500
## Max. :7.000 Max. :235.00 Max. :135.00 Max. :150.000
## nr. tough sessions (effort in Z5, T1 or T2).2 nr. days with interval session.2
## Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.000
## Median :1.0000 Median :2.000
## Mean :0.9153 Mean :1.653
## 3rd Qu.:2.0000 3rd Qu.:3.000
## Max. :6.0000 Max. :7.000
## total km Z3-4.2 max km Z3-4 one day.2 total km Z5-T1-T2.2
## Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 0.000
## Median : 0.000 Median : 0.000 Median : 1.000
## Mean : 4.793 Mean : 3.399 Mean : 3.977
## 3rd Qu.: 8.000 3rd Qu.: 6.200 3rd Qu.: 6.100
## Max. :85.000 Max. :75.000 Max. :52.200
## max km Z5-T1-T2 one day.2 total hours alternative training.2
## Min. : 0.000 Min. : 0.000
## 1st Qu.: 0.000 1st Qu.: 0.000
## Median : 1.000 Median : 0.000
## Mean : 2.665 Mean : 1.204
## 3rd Qu.: 4.900 3rd Qu.: 1.580
## Max. :30.000 Max. :67.330
## nr. strength trainings.2 avg exertion.2 min exertion.2 max exertion.2
## Min. :0.0000 Min. :0.00 Min. :0.0000 Min. :0.00
## 1st Qu.:0.0000 1st Qu.:0.14 1st Qu.:0.0900 1st Qu.:0.16
## Median :1.0000 Median :0.32 Median :0.1400 Median :0.51
## Mean :0.8244 Mean :0.32 Mean :0.1878 Mean :0.47
## 3rd Qu.:1.0000 3rd Qu.:0.49 3rd Qu.:0.2700 3rd Qu.:0.73
## Max. :9.0000 Max. :0.98 Max. :0.9800 Max. :1.00
## avg training success.2 min training success.2 max training success.2
## Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.5900 Median :0.3800 Median :0.7300
## Mean :0.4481 Mean :0.3396 Mean :0.5257
## 3rd Qu.:0.7300 3rd Qu.:0.6100 3rd Qu.:0.8400
## Max. :1.0000 Max. :1.0000 Max. :1.0000
## avg recovery.2 min recovery.2 max recovery.2 Athlete ID
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. : 0.00
## 1st Qu.:0.1500 1st Qu.:0.1100 1st Qu.:0.1700 1st Qu.:20.00
## Median :0.2200 Median :0.1600 Median :0.3100 Median :34.00
## Mean :0.2551 Mean :0.1849 Mean :0.3435 Mean :34.54
## 3rd Qu.:0.3600 3rd Qu.:0.2400 3rd Qu.:0.5200 3rd Qu.:50.00
## Max. :0.9000 Max. :0.9000 Max. :1.0000 Max. :73.00
## injury rel total kms week 0_1 rel total kms week 0_2
## Min. :0.00000 Min. : 0 Min. : 0
## 1st Qu.:0.00000 1st Qu.: 1 1st Qu.: 1
## Median :0.00000 Median : 1 Median : 1
## Mean :0.01344 Mean : 440863 Mean : 901468
## 3rd Qu.:0.00000 3rd Qu.: 1 3rd Qu.: 1
## Max. :1.00000 Max. :209600000 Max. :217600000
## rel total kms week 1_2 Date
## Min. : 0 Min. : 0
## 1st Qu.: 1 1st Qu.: 437
## Median : 1 Median :1254
## Mean : 480362 Mean :1228
## 3rd Qu.: 1 3rd Qu.:1913
## Max. :209600000 Max. :2673
# distance sum over 3 weeks
# week 0-2 (Distance in week 0 and the average across 1 and 2)
ac_7_21 <- week$`total kms`/((week$`total kms.1` + week$`total kms.2`)/2)
ac_7_21[is.infinite(ac_7_21)] <- NA
week$ac_7_21 <- ac_7_21
p <- week %>%
ggplot(aes(x = ac_7_21)) +
geom_density(aes( fill = factor(injury)), alpha = 0.3) +
scale_fill_manual(values = c("red2", "green")) +
labs(title = "Bar Chart and Density Plot of Average Exertion by Injury Status",
x = "Date", y = "Density", fill = "Injury Status") +
xlim(0,3)
p
## Warning: Removed 4726 rows containing non-finite values (`stat_density()`).
ae_7_21 <- week$`avg exertion`/((week$`avg exertion.1` + week$`avg exertion.2`)/2)
ae_7_21[is.infinite(ae_7_21)] <- NA
exer_v_rec_7_21 <- week$`max exertion`/((week$`avg recovery.1`+ week$`avg recovery.2`)/2) ## Exertion 0 / recovery 1 over 3 weeks
exer_v_rec_7_21[is.infinite(exer_v_rec_7_21)] <- NA
er_7_21 <- week$`avg exertion`/((week$`avg recovery.1` + week$`avg recovery.2`)/2)
er_7_21[is.infinite(er_7_21)] <- NA
rec_maxexer_7_21<- week$`avg recovery`/((week$`max exertion.1` + week$`max exertion.2`)/2)
rec_maxexer_7_21[is.infinite(rec_maxexer_7_21)] <- NA
week$ae_7_21 <- ae_7_21
threshold <- 2.25
week$ae_7_21 <- ifelse(week$ae_7_21 > threshold, mean(week$ae_7_21), week$ae_7_21)
summary(week$ae_7_21)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0000 0.8966 1.0000 0.9989 1.1034 2.2500 2857
week$exer_v_rec_7_21 <- exer_v_rec_7_21
threshold <- 2.25
week$exer_v_rec_7_21 <- ifelse(week$exer_v_rec_7_21 > threshold, mean(week$exer_v_rec_7_21), week$exer_v_rec_7_21)
summary(week$exer_v_rec_7_21)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.000 1.032 1.308 1.313 1.636 2.250 13929
week$er_7_21 <- er_7_21
threshold <- 2.25
week$er_7_21 <- ifelse(week$er_7_21 > threshold, mean(week$er_7_21), week$er_7_21)
summary(week$er_7_21)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.000 0.875 1.080 1.125 1.368 2.250 6143
week$rec_maxexer_7_21 <- rec_maxexer_7_21
threshold <- 2.25
week$rec_maxexer_7_21 <- ifelse(week$rec_maxexer_7_21 > threshold, mean(week$rec_maxexer_7_21), week$rec_maxexer_7_21)
summary(week$rec_maxexer_7_21)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0000 0.4051 0.6282 0.6550 0.8485 2.2500 2681
model <- week[,c(1:3, 12:14, 16, 17, 20, 68, 73:76)]
model <- na.omit(model)
full_model <- glm(injury ~ ., data = model, family = binomial())
summary(full_model)
##
## Call:
## glm(formula = injury ~ ., family = binomial(), data = model)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.3103 -0.1743 -0.1444 -0.1230 3.5424
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.58411 0.57505 -6.233 4.59e-10 ***
## `nr. sessions` -0.05758 0.06873 -0.838 0.402170
## `nr. rest days` -0.28656 0.08330 -3.440 0.000582 ***
## `total kms` -0.00437 0.00265 -1.649 0.099152 .
## `total hours alternative training` -0.11027 0.04085 -2.699 0.006948 **
## `nr. strength trainings` 0.06048 0.06811 0.888 0.374496
## `avg exertion` 2.01601 2.43478 0.828 0.407667
## `max exertion` 0.55473 1.79439 0.309 0.757208
## `avg training success` -0.23085 0.31246 -0.739 0.460019
## `avg recovery` -0.81802 0.77478 -1.056 0.291061
## ac_7_21 -0.03031 0.06297 -0.481 0.630315
## ae_7_21 0.07275 0.33287 0.219 0.827005
## exer_v_rec_7_21 0.23017 0.71383 0.322 0.747112
## er_7_21 -0.67293 0.95695 -0.703 0.481931
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 3558.7 on 27553 degrees of freedom
## Residual deviance: 3478.9 on 27540 degrees of freedom
## AIC: 3506.9
##
## Number of Fisher Scoring iterations: 7
step_model <- stepAIC(full_model, direction = "forward", scope = list(lower = ~1, upper = ~.), trace = FALSE)
summary(step_model)
##
## Call:
## glm(formula = injury ~ `nr. sessions` + `nr. rest days` + `total kms` +
## `total hours alternative training` + `nr. strength trainings` +
## `avg exertion` + `max exertion` + `avg training success` +
## `avg recovery` + ac_7_21 + ae_7_21 + exer_v_rec_7_21 + er_7_21,
## family = binomial(), data = model)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.3103 -0.1743 -0.1444 -0.1230 3.5424
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.58411 0.57505 -6.233 4.59e-10 ***
## `nr. sessions` -0.05758 0.06873 -0.838 0.402170
## `nr. rest days` -0.28656 0.08330 -3.440 0.000582 ***
## `total kms` -0.00437 0.00265 -1.649 0.099152 .
## `total hours alternative training` -0.11027 0.04085 -2.699 0.006948 **
## `nr. strength trainings` 0.06048 0.06811 0.888 0.374496
## `avg exertion` 2.01601 2.43478 0.828 0.407667
## `max exertion` 0.55473 1.79439 0.309 0.757208
## `avg training success` -0.23085 0.31246 -0.739 0.460019
## `avg recovery` -0.81802 0.77478 -1.056 0.291061
## ac_7_21 -0.03031 0.06297 -0.481 0.630315
## ae_7_21 0.07275 0.33287 0.219 0.827005
## exer_v_rec_7_21 0.23017 0.71383 0.322 0.747112
## er_7_21 -0.67293 0.95695 -0.703 0.481931
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 3558.7 on 27553 degrees of freedom
## Residual deviance: 3478.9 on 27540 degrees of freedom
## AIC: 3506.9
##
## Number of Fisher Scoring iterations: 7
Looks as if our current percentage of injuries as opposed to training days is 1.42 %
training_days <- sum(week$`nr. sessions` > 0)
sum(as.numeric(week$injury)) / training_days
## [1] 0.01419087
summary(as.factor(week$injury))
## 0 1
## 42223 575
athlete <- week %>%
group_by(`Athlete ID`) %>%
summarise(Injuries = sum(as.numeric(injury))) %>%
arrange(desc(Injuries)) %>%
head(10)
athlete
## # A tibble: 10 × 2
## `Athlete ID` Injuries
## <dbl> <dbl>
## 1 26 35
## 2 29 33
## 3 42 32
## 4 23 24
## 5 9 22
## 6 27 22
## 7 36 22
## 8 22 21
## 9 38 18
## 10 41 18
Injury_Plot <- ggplot(athlete, mapping = aes(x = Injuries)) +
geom_density(alpha=0.5, fill="maroon") +
labs(x = "Total Injuries",
title = "Distribution of Injuries among Athletes")
Injury_Plot
athlete1 <- week[week$`Athlete ID` == "26",]
# Create plot
Athlete_Workloads <- ggplot(athlete1, # Set data
aes(x = `Date`, y = `avg exertion`, color = factor(injury))) + # Set aesthetics
geom_point(alpha = 0.5) + # Set geom_point for scatter plot
labs(x = "Days", # Set labels
title = "Athlete 26 Injuries",
fill = "Injuries") +
theme_bw() +
theme(axis.line = element_line(colour = "black"), # Set axis line as black
panel.grid.major = element_blank(), # Remove grid
panel.grid.minor = element_blank(), # Remove grid
panel.border = element_blank(), # Remove grid
panel.background = element_blank()) + # Remove grid
scale_color_manual(values = c("0" = "blue", "1" = "red"), # Set color values
labels = c("0" = "Healthy", "1" = "Injured"))
# Generate graph
ggMarginal(Athlete_Workloads , groupFill = TRUE,
type = "histogram")
pivotdat <- pivot_longer(athlete1[, c("min recovery", "avg recovery", "max recovery", "min exertion", "avg exertion", "max exertion", "min training success", "avg training success", "max training success", "Athlete ID", "injury")],
!c("Athlete ID", "injury")) # Set variables to use as ID
pivotdatall <- pivot_longer(week[, c("min recovery", "avg recovery", "max recovery", "min exertion", "avg exertion", "max exertion", "min training success", "avg training success", "max training success", "Athlete ID", "injury")],
!c("Athlete ID", "injury")) # Set variables to use as ID
g <- ggplot(pivotdat,
aes(x = value, fill = factor(injury))) + # Set aesthetics
geom_density(alpha = 0.3) + # Set geom density for density plot
labs(x = "Percieved Metric Values", # Set labels
title = "Injuries v Percieved Metric Values Leading Up to Injury",
fill = "injury") +
facet_wrap(~name, scale = "free") +
theme_bw() + # Set theme
theme(axis.line = element_line(colour = "black"), # Set axis line as black
panel.grid.major = element_blank(), # Remove grid
panel.grid.minor = element_blank(), # Remove grid
panel.border = element_blank(), # Remove grid
panel.background = element_blank()) + # Remove grid
scale_fill_manual(values = c("0" = "blue", "1" = "red"), # Set color values
labels = c("0" = "Healthy", "1" = "Injury"))
# Generate graph
g
gall <- ggplot(pivotdatall,
aes(x = value, fill = factor(injury))) + # Set aesthetics
geom_density(alpha = 0.3) + # Set geom density for density plot
labs(x = "Percieved Metric Values", # Set labels
title = "Injuries v Percieved Metric Values Leading Up to Injury",
fill = "injury") +
facet_wrap(~name, scale = "free") +
theme_bw() + # Set theme
theme(axis.line = element_line(colour = "black"), # Set axis line as black
panel.grid.major = element_blank(), # Remove grid
panel.grid.minor = element_blank(), # Remove grid
panel.border = element_blank(), # Remove grid
panel.background = element_blank()) + # Remove grid
scale_fill_manual(values = c("0" = "blue", "1" = "red"), # Set color values
labels = c("0" = "Healthy", "1" = "Injury"))
# Generate graph
gall
# Lets select top 10 most injured athletes to see if there are any outstanding similarities
athletes <- week[week$`Athlete ID` %in% athlete$`Athlete ID`,]
# Create graph
athlete_ex <- ggplot(athlete1, # Set data
aes(x = `avg recovery.1`, y = `max exertion` , color = factor(injury))) + # Set aesthetics
geom_point(alpha = 0.3) + # Set geom point for scatter plot
labs(x = "Avg Recovery week before Injury", # Set labels
y = "Max Exertion week of Injury",
title = "Max Exertion versus Average Recovery before injury",
fill = "injury") +
theme_bw() + # Set theme
theme(axis.line = element_line(colour = "black"), # Set axis line as black
panel.grid.major = element_blank(), # Remove grid
panel.grid.minor = element_blank(), # Remove grid
panel.border = element_blank(), # Remove grid
panel.background = element_blank()) + # Remove grid
scale_color_manual(values = c("0" = "blue", "1" = "red"), # Set color values
labels = c("0" = "Healthy", "1" = "Injury"))
# Generate graph
ggMarginal(athlete_ex, groupFill = TRUE)
# Create graph
week_ex <- ggplot(week, # Set data
aes(x = `avg recovery.1`, y = `max exertion`,color = factor(injury))) + # Set aesthetics
geom_point(alpha = 0.3) + # Set geom point for scatter plot
labs(x = "Avg Recovery week before Injury", # Set labels
y = "Max Exertion week of Injury",
title = "Max Exertion versus Average Recovery before injury",
fill = "injury") +
theme_bw() + # Set theme
theme(axis.line = element_line(colour = "black"), # Set axis line as black
panel.grid.major = element_blank(), # Remove grid
panel.grid.minor = element_blank(), # Remove grid
panel.border = element_blank(), # Remove grid
panel.background = element_blank()) + # Remove grid
scale_color_manual(values = c("0" = "blue", "1" = "red"), # Set color values
labels = c("0" = "Healthy", "1" = "Injury"))
# Generate graph
ggMarginal(week_ex, groupFill = TRUE)
set.seed(42069) # Set seed for reproducibility
trainIndex <- createDataPartition(y = week$injury, p = 0.75, list = FALSE)
trainData <- week[trainIndex, ]
testData <- week[-trainIndex, ]
validData <- week[week$injury==1,]
# c(1:4,6, 12:22, 73:76)
# Create training data
dtrain_1 <- xgb.DMatrix(data = as.matrix(trainData[,c(1:4,6, 12:22, 36:44, 58:66, 74:77)]), label = ((trainData$injury)))
# Create test data
dtest_1 <- xgb.DMatrix(data = as.matrix(testData[,c(1:4,6, 12:22, 36:44, 58:66, 74:77)]), label = ((testData$injury)))
fit_1 <- xgboost(dtrain_1, # Set data set to use
nrounds = 200, # Set number of rounds
eta = 0.05 ,
verbose = 1, # 1 - Prints out fit
print_every_n = 20, # Prints out result every 20th iteration
objective = "binary:logistic", # Set objective
eval_metric = "auc",
eval_metric = "error")
## [1] train-auc:0.611286 train-error:0.013303
## [21] train-auc:0.627602 train-error:0.013178
## [41] train-auc:0.703593 train-error:0.013209
## [61] train-auc:0.799753 train-error:0.013240
## [81] train-auc:0.866385 train-error:0.013271
## [101] train-auc:0.917969 train-error:0.013240
## [121] train-auc:0.939627 train-error:0.013085
## [141] train-auc:0.949698 train-error:0.012929
## [161] train-auc:0.960221 train-error:0.012742
## [181] train-auc:0.967799 train-error:0.012493
## [200] train-auc:0.974267 train-error:0.012430
preds_injury <- predict(fit_1, dtest_1)
roc1 = roc((testData$injury), preds_injury)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
plot.roc(roc1, print.auc = TRUE, col = "red", print.auc.col = "red")
pred_class <- rep(0, length(preds_injury))
# If predicted probability above cutoff set as 1
pred_class[which(preds_injury > 0.0145)] <- 1
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 7946 61
## 1 2605 87
##
## Accuracy : 0.7508
## 95% CI : (0.7425, 0.759)
## No Information Rate : 0.9862
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.036
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.75310
## Specificity : 0.58784
## Pos Pred Value : 0.99238
## Neg Pred Value : 0.03232
## Prevalence : 0.98617
## Detection Rate : 0.74269
## Detection Prevalence : 0.74839
## Balanced Accuracy : 0.67047
##
## 'Positive' Class : 0
##
shap_result_1 <- shap.score.rank(xgb_model = fit_1,
X_train = as.matrix(trainData[,c(1:4,6, 12:22, 36:44, 58:66, 74:77)]),
shap_approx = F)
## Loading required package: data.table
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## make SHAP score by decreasing order
shap_long_1 = shap.prep(shap = shap_result_1,
X_train = as.matrix(trainData[,c(1:4,6, 12:22, 36:44, 58:66, 74:77)]),
top_n = 8)
## Loading required package: ggforce
plot.shap.summary(data_long = shap_long_1)
athletes <- week[week$`Athlete ID` %in% athlete$`Athlete ID`,]